####################################################################################################
##### Replication File: A Slippery Slope -- The Domestic Diffusion of Ethnic Civil War
##### Authors: Nils-Christian Bormann and Jesse Hammond 
##### Journal: International Studies Quarterly
####################################################################################################
##### Read.me
# The analysis reported below was conducted in R version 3.2.2 "Fire Safety"
# Please note that you need to load the "Cluster_SE.R" and "Prediction.R" functions

####################################################################################################
#### Header 
rm(list=ls())
setwd() # set your working directory

## check libraries and possibly install
libs <- c("plyr", "corrplot", "pROC", "lmtest", "mgcv", "nlme", "MASS", "mvtnorm", "stargazer")
for(i in 1:length(libs)){
  if(libs[1] %in% rownames(installed.packages()) == FALSE) {install.packages(libs[1])}  
}
rm(i, libs)

# load libraries
library(plyr); library(corrplot); library("pROC"); library(lmtest); library(stargazer)

# load additional functions
source("Cluster_SE.R") # clustered standard errors
source("Prediction.R") # simulation of predicted probabilities

####################################################################################################
#### Data
dat <- read.csv("slipslope_replication_data-160426.csv", header=T, sep=",")
names(dat)
dim(dat)
rownames(dat) <- dat$grpyear <- dat$cowgroupid *10000 + dat$year # necessary for cluster function to work

## check data
table(dat$isrelevant) # only relevant groups
table(dat$statusid) # dominant (code=2) and monopoly (code=1) groups dropped
min(dat$ngroups, na.rm=T) # only cases with at least three groups
table(dat$conflictxp) # only group-years in countries with prior conflict

## Variables
# onset_do_flag = UCDP/PRIO ACD 25 battle deaths onset indicator (dropped ongoing) for each ethnic group
# overlap = degree of overlap between conflict zone and previously peaceful ethnic group territory
# mindist_conborder_rel_now = minimum distance to border of active conflict zone
# ctrfight_now = maximum count of ongoing civil war within country (groups do not observe their own rebellion)
# ctrnfights = sum of ongoing civil wars in country (groups do not observe their own rebellion)
# status_excl =  group excluded from executive power
# family_downgraded2 = group downgraded in power status 
# rbal = relative group size
# rbal2 = relative group size squared
# warhist = count of previous civil wars
# ln_mindist_cap = logged minimum distance to capital
# lnngroups = logged count of groups in country
# ln_rgdppc_lag = logged GDP per capita from previous year
# lpopl = logged population count from previuos year
# pyears = time in years since last civil war by group or since independence of state
# pyears2 = peace years squared
# pyears3 = peace years cubed

####################################################################################################
#### Table 1:
# # Model 1: overlap w/o controls
form <- onset_do_flag ~ overlap + pyears + pyears2 + pyears3
m1 <- glm(form, data=dat, family=binomial(link="logit"))
m1cl <- cluster(m1, m1$y, dat$cowid[dat$grpyear%in%rownames(m1$model)])
m1cl[1] 

# # Model 2: overlap + controls relative distance to conflict + controls
form <- onset_do_flag ~ overlap + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
m2 <- glm(form, data=dat, family=binomial(link="logit"))
m2cl <- cluster(m2, m2$y, dat$cowid[dat$grpyear%in%rownames(m2$model)])
m2cl[1] 

# # Model 3: relative distance to conflict w/o controls
form <- onset_do_flag ~ mindist_conborder_rel_now +  pyears + pyears2 + pyears3
m3 <- glm(form, data=dat, family=binomial(link="logit"))
m3cl <- cluster(m3, m3$y, dat$cowid[dat$grpyear%in%rownames(m3$model)])
m3cl[1] 

# # Model 4: relative distance to conflict + controls
form <- onset_do_flag ~ mindist_conborder_rel_now + 
  status_excl + family_downgraded2 + rbal + rbal2 +  warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
m4 <- glm(form, data=dat, family=binomial(link="logit"))
m4cl <- cluster(m4, m4$y, dat$cowid[dat$grpyear%in%rownames(m4$model)])
m4cl[1] 

# # Model 5: rel. dist. to conflict border * exclusion (no dispersed groups)
dat$exXmdb <- dat$status_excl * dat$mindist_conborder_rel_now # interaction effect
form <- onset_do_flag ~ mindist_conborder_rel_now + status_excl + exXmdb + 
  family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
m5 <- glm(form, data=dat, family=binomial(link="logit"), subset=type!=5)
m5cl <- cluster(m5, m5$y, dat$cowid[dat$grpyear%in%rownames(m5$model)])
m5cl[1]

####################################################################################################
#### Table 2: Domestic civil war diffusion and timing of new onsets, 1946-2006.
# Model 6: observed civil war duration
form <- onset_do_flag ~ ctrfight_now + 
  pyears + pyears2 + pyears3
m6 <- glm(form, data=dat, family=binomial(link="logit"))
m6cl <- cluster(m6, m6$y, dat$cowid[dat$grpyear%in%rownames(m6$model)])
m6cl[1] 


# # Model 7: observed civil war duration + controls
form <- onset_do_flag ~ ctrfight_now + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap +
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
m7 <- glm(form, data=dat, family=binomial(link="logit"))
m7cl <- cluster(m7, m7$y, dat$cowid[dat$grpyear%in%rownames(m7$model)])
m7cl[1] 

# Model 8: observed civil war count
form <- onset_do_flag ~ ctrnfights + 
  pyears + pyears2 + pyears3
m8 <- glm(form, data=dat, family=binomial(link="logit"))
m8cl <- cluster(m8, m8$y, dat$cowid[dat$grpyear%in%rownames(m8$model)])
m8cl[1] 

# # Model 9: observed civil war count + controls
form <- onset_do_flag ~ ctrnfights +  
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
m9 <- glm(form, data=dat, family=binomial(link="logit"))
m9cl <- cluster(m9, m9$y, dat$cowid[dat$grpyear%in%rownames(m9$model)])
m9cl[1]

####################################################################################################
#### Figure 2: Difference in probability of new ethnic civil war onset between excluded and
   #           included groups as a function of relative distance to conflict (Model 5).

# set control variables to mean or mode
controls <- c(1, 0, 0, 0, 0, mean(m5$model$rbal), mean(m5$model$rbal)^2, 0, mean(m5$model$ln_mindist_cap), 
              mean(m5$model$lnngroups), mean(m5$model$ln_rgdppc_lag), mean(m5$model$lpopl), 
              mean(m5$model$pyears), mean(m5$model$pyears)^2, mean(m5$model$pyears)^3
)

# first difference excluded - included
fd5 <- prediction(coef=coef(m5), vcv=m5cl[[2]], model="logit", sim=1000, ci=90, setcontrol=controls,
                  firstd=T, setx=matrix(seq(0,.5,by=.01), ncol=1), position=2,
                  setx1=matrix(c(seq(0,.5,by=.01), rep(1,51),seq(0,.5,by=.01)), ncol=3), position1=2:4)

# plot	
xfactor=1.6
par(cex=1.25)
par(mar=c(5,4,0,0)+.1)
plot(x=seq(0,.5,by=.01), y=fd5[2,], 
     type="l", bty="l", lwd=xfactor, ylim=c(-.01, .06), 
     axes=F, xlab="Relative Distance to Nearest Conflict", ylab="Pr(CW | Excluded) - Pr(CW | Included)")
abline(h=seq(-.01,0.07,by=.005), lty=3, col="grey80")
axis(1); axis(2)
lines(x=seq(0,.5,by=.01), y=fd5[1,], lwd=xfactor, lty=2)
lines(x=seq(0,.5,by=.01), y=fd5[3,], lwd=xfactor, lty=2)
rug(jitter(m5$model$mindist_conborder_rel[m5$y==1], amount=0.01), side=1)
legend("topright", c("Mean", "90% CI"), lty=c(1,2), lwd=xfactor, bty="n")

dev.off()

####################################################################################################
#### Figure 3: Predictive accuracy diffusion variables (in-sample).
# control model
form <- onset_do_flag ~ status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap +
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
m0 <- glm(form, data=dat, family=binomial(link="logit"))

# predict
m0p <- predict(m0,type=c("response"))
m5p <- predict(m5,type=c("response"))
m9p <- predict(m9,type=c("response"))

# combine predictions and observed outcomes
preds1 <- as.data.frame(cbind(m0$y, m0p, m9p))
names(preds1) <- c("onset", "p0", "p9")
preds2 <- as.data.frame(cbind(m5$y, m5p)) # different sample size 
names(preds2) <- c("onset", "p5")

# prepare ROC curves
g0 <- roc(onset ~ p0, data = preds1)
g9 <- roc(onset ~ p9, data = preds1)
g5 <- roc(onset ~ p5, data = preds2)


# plot
par(mar=c(5,4,0,0)+.1)
par(cex=1.1, lwd=1.1)
plot(g0, 
     bty="n",
     xlab="False positive rate",
     ylab="True positive rate") 
plot(g5, lty=2, add=T)
plot(g9, lty=3, add=T)
legend("right", c("Controls only", "Model 5", "Model 9"), lty=1:3, bty="n")

dev.off()

####################################################################################################
#### Supplementary Material
####################################################################################################


#### 1 Descriptive Statistics:
####################################################################################################
## Table S1: Summary statistics of main variables
stargazer(dat[,c("onset_do_flag", "overlap", "mindist_conborder_rel_now","ctrfight_now", "ctrnfights", 
				 "status_excl", "family_downgraded2", "rbal", "warhist", "ln_mindist_cap", 
				 "lnngroups", "ln_rgdppc_lag", "lpopl", "pyears")],
 		  covariate.labels=c("Ethnic Civil War Onset", "Overlap", "Rel. Conflict Distance", "Obs. Conflict Duration", "Obs. Conflict Count", 
				  			 "Excluded", "Downgraded", "Relative Size", "Past Conflicts", "Log(Capital Distance)", 
							 "Log(\\# of Groups)", "Log(GDP p.c.)", "Log(Population)",  "Peace Years"),
		  summary=T,
		  title="Summary statistics of major variables.",
  		label="tab:sum",
		  type="text"
  )

####################################################################################################
## Table S2: Cases in analysis
cases <- ddply(dat, .(countryname), summarize, 
				conflictsum=sum(onset_do_flag, na.rm=T),
				groupcount = max(ngroups, na.rm=T),
				startyear=min(year), 
				endyear=max(year))		
rownames(cases) <- cases$countryname

stargazer(cases[2:5],
		  covariate.labels=c("Countries", "\\# of Onsets", "\\# of Groups", "Start Year", "End Year"),
		  summary=F,
		  title="Cases in Analysis",
		  label="tab:cas",
		  digit.separator="",
		  font.size="small",
		  type="text"
		)

####################################################################################################
## Figure S1: Correlation matrix
corrM <- cor(dat[,c("onset_do_flag", "overlap", "mindist_conborder_rel_now", "ctrfight_now", "ctrnfights", 
					"status_excl", "family_downgraded2", "rbal", "warhist", "ln_mindist_cap", 
					"lnngroups", "ln_rgdppc_lag", "lpopl", "pyears")],
			use="complete.obs", method="pearson"
		)
rownames(corrM) <- colnames(corrM) <- c("CW Onset", "Overlap","Rel. CW Distance", "Obs. CW Duration", "Obs. CW Count",
										"Excluded", "Downgraded", "Rel. Size", "Past CWs", 
										"Log(Capital Dist.)", "# of Grps.", "Log(GDP p.c.)", 
										"Log(Population)",  "Peace Yrs")
		
par(cex=.8)
corrplot.mixed(corrM, lower="number", upper = "circle", tl.pos="lt", tl.col="black") #plot matrix
dev.off()



#### 2 Robustness Tests:
####################################################################################################
## Table S-3: group-/country-fixed effects
# # Model 1: overlap +  country & year FEs
form <- onset_do_flag ~ overlap +  
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3 + factor(cowid) + factor(year)
sm1 <- glm(form, data=dat, family=binomial(link="logit"))
summary(sm1)

# # Model 2: rel. distance to conflict border +  country & year FEs
form <- onset_do_flag ~ mindist_conborder_rel_now +  
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3 + factor(cowid) + factor(year)
sm2 <- glm(form, data=dat, family=binomial(link="logit"))
summary(sm2)

# # Model 3: rel. dist. to conflict border (no dispersed groups) + interaction excluded + country fes
dat$exXmdb <- dat$mindist_conborder_rel_now * dat$status_excl
form <- onset_do_flag ~ mindist_conborder_rel_now + status_excl + exXmdb + 
  family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3 + factor(cowid) + factor(year)
sm3 <- glm(form, data=dat, family=binomial(link="logit"), subset=type!=5)
summary(sm3)

# # Model 4: observed civil war duration + group FEs + controls
form <- onset_do_flag ~ ctrfight_now +  
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap +
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3 + factor(cowgroupid) -1
sm4 <- glm(form, data=dat, family=binomial(link="logit"))
summary(sm4)

# # Model 5: observed civil war count + group FEs + controls
form <- onset_do_flag ~ ctrnfights +  
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap +
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3 + factor(cowgroupid)
sm5 <- glm(form, data=dat, family=binomial(link="logit"))
summary(sm5)

####################################################################################################
## Table S-4: alternative conflict distance lags

# # Model 6: relative distance to civil war (2-yr lag) + observed civil war duration +  controls
form <- onset_do_flag ~ mindist_conborder_rel2 + ctrfight_now + 
  status_excl + family_downgraded2 + rbal + rbal2 +  warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm6 <- glm(form, data=dat, family=binomial(link="logit"))
sm6cl <- cluster(sm6, sm6$y, dat$cowid[dat$grpyear%in%rownames(sm6$model)])
sm6cl[1] 

# # Model 7: relative distance to civil war (2-yr lag) + observed civil war count +  controls
form <- onset_do_flag ~ mindist_conborder_rel2 + ctrnfights + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm7 <- glm(form, data=dat, family=binomial(link="logit"), subset=type!=5)
sm7cl <- cluster(sm7, sm7$y, dat$cowid[dat$grpyear%in%rownames(sm7$model)])
sm7cl[1]

# # Model 8: relative distance to civil war (5-yr lag) + observed civil war duration +  controls
form <- onset_do_flag ~ mindist_conborder_rel5 + ctrfight_now + 
  status_excl + family_downgraded2 + rbal + rbal2 +  warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm8 <- glm(form, data=dat, family=binomial(link="logit"))
sm8cl <- cluster(sm8, sm8$y, dat$cowid[dat$grpyear%in%rownames(sm8$model)])
sm8cl[1] 

# # Model 9: relative distance to civil war (5-yr lag) + observed civil war count +  controls
form <- onset_do_flag ~ mindist_conborder_rel5 + ctrnfights + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl +# xpolity2 + xpolity22 + 
  pyears + pyears2 + pyears3
sm9 <- glm(form, data=dat, family=binomial(link="logit"), subset=type!=5)
sm9cl <- cluster(sm9, sm9$y, dat$cowid[dat$grpyear%in%rownames(sm9$model)])
sm9cl[1]

####################################################################################################
## Table S-5: alternative conflict distance measurements

# # Model 10: relative distance to conflict center (no lag) + controls
form <- onset_do_flag ~ mindist_concenter_rel_now + 
  status_excl + family_downgraded2 + rbal + rbal2 +  warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm10 <- glm(form, data=dat, family=binomial(link="logit"))
sm10cl <- cluster(sm10, sm10$y, dat$cowid[dat$grpyear%in%rownames(sm10$model)])
sm10cl[1] 

# # Model 11: rel. dist. to conflict center (no lag) * exclusion (no dispersed groups)
dat$exXmdc <- dat$status_excl * dat$mindist_concenter_rel_now
form <- onset_do_flag ~ mindist_concenter_rel_now + status_excl + exXmdc + 
  family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm11 <- glm(form, data=dat, family=binomial(link="logit"), subset=type!=5)
sm11cl <- cluster(sm11, sm11$y, dat$cowid[dat$grpyear%in%rownames(sm11$model)])
sm11cl[1]

# # Model 12: actual distance to conflict border (no lag) + controls 
form <- onset_do_flag ~ mindist_conborder + 
  status_excl + family_downgraded2 + rbal + rbal2 +  warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm12 <- glm(form, data=dat, family=binomial(link="logit"))
sm12cl <- cluster(sm12, sm12$y, dat$cowid[dat$grpyear%in%rownames(sm12$model)])
sm12cl[1] 

# # Model 13: actual distance to conflict center (no lag) + controls 
form <- onset_do_flag ~ mindist_concenter + status_excl +
  family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl +# xpolity2 + xpolity22 + 
  pyears + pyears2 + pyears3
sm13 <- glm(form, data=dat, family=binomial(link="logit"))
sm13cl <- cluster(sm13, sm13$y, dat$cowid[dat$grpyear%in%rownames(sm13$model)])
sm13cl[1]


####################################################################################################
## Table S-6: Polity IV, observing other armed conflict durations and battle-deaths

# # Model 14: observed duration of civil wars + X-Polity + controls
form <- onset_do_flag ~ ctrfight_now +  status_excl + family_downgraded2 + 
                        rbal + rbal2 + warhist + ln_mindist_cap + 
                        lnngroups + ln_rgdppc_lag + lpopl + xpolity2 +  
                        pyears + pyears2 + pyears3
sm14 <- glm(form, data=dat, family=binomial(link="logit"))
sm14cl <- cluster(sm14, sm14$y, dat$cowid[dat$grpyear%in%rownames(sm14$model)])
sm14cl[1] 

# # Model 15: observed count of civil wars + X-Polity + controls
form <- onset_do_flag ~ ctrnfights +  status_excl + family_downgraded2 + 
                        rbal + rbal2 + warhist + ln_mindist_cap + 
                        lnngroups + ln_rgdppc_lag + lpopl + xpolity2 + 
                        pyears + pyears2 + pyears3
sm15 <- glm(form, data=dat, family=binomial(link="logit"))
sm15cl <- cluster(sm15, sm15$y, dat$cowid[dat$grpyear%in%rownames(sm15$model)])
sm15cl[1] 

# # Model 16: observed duration of interstate wars
dat$ctrfight_iw_now <- dat$ctrfight_iw * dat$ctrincidence
form <- onset_do_flag ~ ctrfight_iw_now + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + #xpolity2 + xpolity35 + 
  pyears + pyears2 + pyears3
sm16 <- glm(form, data=dat, family=binomial(link="logit"))
sm16cl <- cluster(sm16, sm16$y, dat$cowid[dat$grpyear%in%rownames(sm16$model)])
sm16cl[1] 

# # Model 36: observed duration of non-ethnic wars
dat$ctrfight_ne_now <- dat$ctrfight_ne * dat$ctrincidence
form <- onset_do_flag ~ ctrfight_ne_now + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm17 <- glm(form, data=dat, family=binomial(link="logit"))
sm17cl <- cluster(sm17, sm17$y, dat$cowid[dat$grpyear%in%rownames(sm17$model)])
sm17cl[1] 

# # Model 18: observed duration of battle-deaths (5-year lag) + control
form <- onset_do_flag ~ dat$lnctrbdmax5 + 
  status_excl + family_downgraded2 + rbal + rbal2 + warhist + ln_mindist_cap + 
  lnngroups + ln_rgdppc_lag + lpopl + pyears + pyears2 + pyears3
sm18 <- glm(form, data=dat, family=binomial(link="logit"))
sm18cl <- cluster(sm18, sm18$y, dat$cowid[dat$grpyear%in%rownames(sm18$model)])
sm18cl[1]

